# GRADER_ONLY
autograded_result = {
"netid": None,
"order": [],
"student_score": 0,
"total_available": 0,
"breakdown": {}
}
def record_part(part_name, did_pass, available_points, message=""):
global autograded_result
# If already graded, remove previous graded order entry for this part
if part_name in autograded_result["order"]:
print(f"[Warning] Autograder: {part_name} already graded or a duplicate key exists")
# autograded_result["order"].remove(part_name)
# Record graded order
autograded_result["order"].append(part_name)
# Add graded result for this part
autograded_result["breakdown"][part_name] = {
"available_points": available_points,
"points": available_points if did_pass else 0,
"did_pass": did_pass,
"message": message,
}
# Recalculate total_points and total_available points
autograded_result["student_score"] = 0
autograded_result["total_available"] = 0
for part_info in autograded_result["breakdown"].values():
autograded_result["student_score"] += part_info["points"]
autograded_result["total_available"] += part_info["available_points"]
print(autograded_result)

There have been four Transportation Network Providers (often called rideshare companies 🚗) licensed to operate in Chicago. These rideshare companies are required to routinely report vehicles, drivers, and trips information to the City of Chicago, which are published to the Chicago Data Portal. The latest trips dataset can be downloaded at this page.
Analyze the dataset and answer the following questions:
▶️ Run the code cell below to import unittest, a module used for 🧭 Check Your Work sections and the autograder.
# DO NOT MODIFY THE CODE IN THIS CELL
import base64
import unittest
tc = unittest.TestCase()
assignment_name = "case-study-03-ridesharing-trips-dataviz"
is_autograder_env = 'record_part' in globals().keys()
if is_autograder_env:
autograded_result["assignment_name"] = assignment_name
netid with your NetID below.netid = ""
print(f"Your NetID is {netid}.")
Your NetID is .
# GRADER_ONLY
# This code will be stripped to the student-distributed version
netid = "grader_netid"
If the code cell below doesn't throw an error, you're ready to begin this assignment.
# DO NOT CHANGE THE CODE BELOW
tc.assertIsNotNone(netid)
tc.assertIsInstance(netid, str)
tc.assertNotEqual(netid.strip(), "", "Did you forget to enter your NetID?")
tc.assertFalse(netid.strip().isdecimal(), "Did you enter your UIN instead of your NetID?")
if is_autograder_env:
autograded_result["netid"] = netid
▶️ Run the code below to ensure you're using the correct version of plotly.
# Install plotly 5.3.1 using pip
# Colab environment supports pip
if 'google.colab' in str(get_ipython()):
!pip install plotly==5.3.1
# If you're using conda, use the code below
# !conda install -c plotly plotly=5.3.1
▶️ Run the code cell below to import packages used in the case.
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
# plotly.io is a low-level interface for interacting with figures/
# plotly.io.templates lists available plotly templates
# https://plotly.com/python-api-reference/plotly.io.html
import plotly.io as pio
pd.set_option('display.max_columns', 50)
Run the code below to ensure that your notebook uses the same Plotly version as the autograder.
# DO NOT CHANGE THE CODE IN THIS CELL
print(f'The current plotly version is {plotly.__version__}')
tc.assertTrue(plotly.__version__.startswith('5.3'), 'Your plotly version should be 5.3.x')
The current plotly version is 5.3.1
▶️ Run the code below to import and process the trips dataset.
df = pd.read_csv(
'https://github.com/bdi475/datasets/raw/main/case-studies/chicago-ridesharing/chicago-ridesharing-trips-2019-2020.csv.gz',
compression='gzip',
parse_dates=['start']
)
df_community_areas = pd.read_csv('https://github.com/bdi475/datasets/raw/main/case-studies/chicago-ridesharing/chicago-community-area-numbers.csv')
# Replace community area numbers with area names
df = df.merge(df_community_areas, left_on='pickup_area', right_on='area_number', how='left')
df['pickup_area'] = df['community'].copy()
df.drop(columns=['community', 'area_number'], inplace=True)
df = df.merge(df_community_areas, left_on='dropoff_area', right_on='area_number', how='left')
df['dropoff_area'] = df['community'].copy()
df.drop(columns=['community', 'area_number'], inplace=True)
# Create a copy for 🧭 Check Your Work section
df_backup = df.copy()
# YOUR CODE BEGINS
df.head(5)
# YOUR CODE ENDS
| start | trip_seconds | trip_miles | pickup_area | dropoff_area | fare | tip | additional_charges | trip_total | shared_trip_authorized | trips_pooled | pickup_lat | pickup_lon | dropoff_lat | dropoff_lon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-01-01 | 303 | 1.4 | Garfield Ridge | Clearing | 5.0 | 5.0 | 7.68 | 17.68 | False | 1 | 41.792592 | -87.769615 | 41.779583 | -87.768511 |
| 1 | 2019-01-01 | 697 | 3.0 | Near North Side | Near West Side | 7.5 | 0.0 | 2.50 | 10.00 | False | 1 | 41.892073 | -87.628874 | 41.885300 | -87.642808 |
| 2 | 2019-01-01 | 1598 | 4.7 | Lincoln Park | Loop | 10.0 | 2.0 | 2.50 | 14.50 | False | 1 | 41.922083 | -87.634156 | 41.870607 | -87.622173 |
| 3 | 2019-01-01 | 573 | 0.9 | Near North Side | Near North Side | 5.0 | 0.0 | 2.50 | 7.50 | False | 1 | 41.892042 | -87.631864 | 41.892508 | -87.626215 |
| 4 | 2019-01-01 | 1562 | 2.4 | Near North Side | Near North Side | 10.0 | 0.0 | 2.50 | 12.50 | False | 1 | 41.900221 | -87.629105 | 41.895033 | -87.619711 |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-01"
did_pass = True
available_points = 2
message = ""
try:
pass
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01'], 'student_score': 2, 'total_available': 2, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
# YOUR CODE BEGINS
df.info()
# YOUR CODE ENDS
<class 'pandas.core.frame.DataFrame'> Int64Index: 1388193 entries, 0 to 1388192 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 start 1388193 non-null datetime64[ns] 1 trip_seconds 1388193 non-null int64 2 trip_miles 1388193 non-null float64 3 pickup_area 1388193 non-null object 4 dropoff_area 1388193 non-null object 5 fare 1388193 non-null float64 6 tip 1388193 non-null float64 7 additional_charges 1388193 non-null float64 8 trip_total 1388193 non-null float64 9 shared_trip_authorized 1388193 non-null bool 10 trips_pooled 1388193 non-null int64 11 pickup_lat 1388193 non-null float64 12 pickup_lon 1388193 non-null float64 13 dropoff_lat 1388193 non-null float64 14 dropoff_lon 1388193 non-null float64 dtypes: bool(1), datetime64[ns](1), float64(9), int64(2), object(2) memory usage: 160.2+ MB
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-02"
did_pass = True
available_points = 2
message = ""
try:
pass
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02'], 'student_score': 4, 'total_available': 4, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df to a new variable named num_rows.df to a new variable named num_cols.num_rows and num_cols must be ints..shape, not len().# YOUR CODE BEGINS
num_rows = df.shape[0]
num_cols = df.shape[1]
# YOUR CODE ENDS
print(f'There are {num_rows} rows and {num_cols} columns in the dataset.')
There are 1388193 rows and 15 columns in the dataset.
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-03"
did_pass = True
available_points = 2
message = ""
try:
tc.assertEqual(num_rows, len(df_backup.index), f'Number of rows should be {len(df_backup.index)}')
tc.assertEqual(num_cols, len(df_backup.columns), f'Number of columns should be {len(df_backup.columns)}')
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03'], 'student_score': 6, 'total_available': 6, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
The start column contains trip start timestamps. In this part of the case study, you will extract year, month, day of the month, day of the week, hour, and weekday/weekend information into separate columns.
▶️ Run the code below to print the first 3 values of the start column and the data types.
display(df['start'].head(3))
print('=================================================')
print(f"df['start'] column's data type is {df['start'].dtype}.")
0 2019-01-01 1 2019-01-01 2 2019-01-01 Name: start, dtype: datetime64[ns]
================================================= df['start'] column's data type is datetime64[ns].
df, extract the year (e.g., 2019, 2020) from the start column and store it to a new column named year.
👆 date_series.dt is an accessor object datetimelike Series values. You can refer to the documentation here.
# YOUR CODE BEGINS
df['year'] = df['start'].dt.year
# YOUR CODE ENDS
display(df[['start', 'year']].sample(5))
| start | year | |
|---|---|---|
| 491424 | 2019-07-02 11:00:00 | 2019 |
| 9852 | 2019-01-05 13:15:00 | 2019 |
| 1036018 | 2020-01-29 14:15:00 | 2020 |
| 372753 | 2019-05-18 15:15:00 | 2019 |
| 620176 | 2019-08-20 20:00:00 | 2019 |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-04"
did_pass = True
available_points = 2
message = ""
try:
df_backup['year'] = df_backup['start'].dt.year
tc.assertEqual(df.shape, df_backup.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
df[['start', 'year']].sort_values('start').reset_index(drop=True),
df_backup[['start', 'year']].sort_values('start').reset_index(drop=True),
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04'], 'student_score': 8, 'total_available': 8, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df to new columns.month: 1, 2, ..., 11, 12day: 1, 2, ..., 28, 29, 30, 31dayofweek: 0 for Monday, 6 for Sundayhour: 0, 1, ..., 22, 23# YOUR CODE BEGINS
df['month'] = df['start'].dt.month
df['day'] = df['start'].dt.day
df['dayofweek'] = df['start'].dt.dayofweek
df['hour'] = df['start'].dt.hour
# YOUR CODE ENDS
display(df[['start', 'month', 'day', 'dayofweek', 'hour']].sample(5))
| start | month | day | dayofweek | hour | |
|---|---|---|---|---|---|
| 681497 | 2019-09-14 11:45:00 | 9 | 14 | 5 | 11 |
| 378421 | 2019-05-20 13:30:00 | 5 | 20 | 0 | 13 |
| 710541 | 2019-09-25 17:00:00 | 9 | 25 | 2 | 17 |
| 1360766 | 2020-11-28 13:15:00 | 11 | 28 | 5 | 13 |
| 522507 | 2019-07-14 14:00:00 | 7 | 14 | 6 | 14 |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-05"
did_pass = True
available_points = 4
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfYmFja3VwWydtb250aCddID0gZGZfYmFja3VwWydzd\
GFydCddLmR0Lm1vbnRoCmRmX2JhY2t1cFsnZGF5J10gPSBkZl9iYWNrdXBbJ3N0YXJ0J10uZHQuZG\
F5CmRmX2JhY2t1cFsnZGF5b2Z3ZWVrJ10gPSBkZl9iYWNrdXBbJ3N0YXJ0J10uZHQuZGF5b2Z3ZWV\
rCmRmX2JhY2t1cFsnaG91ciddID0gZGZfYmFja3VwWydzdGFydCddLmR0LmhvdXI=')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(df.shape, df_backup.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
df[['start', 'month', 'day', 'dayofweek', 'hour']].sort_values('start').reset_index(drop=True),
df_backup[['start', 'month', 'day', 'dayofweek', 'hour']].sort_values('start').reset_index(drop=True),
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05'], 'student_score': 12, 'total_available': 12, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
weekday_weekend column¶weekday_weekend in df.weekday_weekend will either be string 'weekday' or string 'weekend' (case-sensitive) based on the value of the dayofweek column.'weekday' if dayofweek is less than or equal to 3 (0 == Monday, 1 == Tuesday, 2 == Wednesday, 3 == Thursday)'weekend' if otherwise (4 == Friday, 5 == Saturday, 6 == Sunday)There are many ways to achieve this task.
The code below creates a new column named cheap_expensive where the value will be string 'cheap' if the price is less than or equal to 10 and 'expensive' if otherwise.
my_dataframe['cheap_expensive'] = np.where(my_dataframe['price'] <= 10, 'cheap', 'expensive')
# YOUR CODE BEGINS
df['weekday_weekend'] = np.where(df['dayofweek'] <= 3, 'weekday', 'weekend')
# YOUR CODE ENDS
display(df[['dayofweek', 'weekday_weekend']].sample(5))
| dayofweek | weekday_weekend | |
|---|---|---|
| 400722 | 2 | weekday |
| 1158969 | 0 | weekday |
| 773554 | 5 | weekend |
| 724987 | 0 | weekday |
| 796975 | 6 | weekend |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-06"
did_pass = True
available_points = 3
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfYmFja3VwWyd3ZWVrZGF5X3dlZWtlbmQnXSA9IG5wL\
ndoZXJlKGRmX2JhY2t1cFsnZGF5b2Z3ZWVrJ10gPD0gMywgJ3dlZWtkYXknLCAnd2Vla2VuZCcp')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(df.shape, df_backup.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
df[['start', 'dayofweek', 'weekday_weekend']].sort_values('start').reset_index(drop=True),
df_backup[['start', 'dayofweek', 'weekday_weekend']].sort_values('start').reset_index(drop=True),
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06'], 'student_score': 15, 'total_available': 15, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
Although the first case of COVID-19 was reported in January 2020 in the United States, people started to take it seriously in March 2020.
How did COVID-19 affect the volume of ridesharing trips? 💨 Let's visualize and compare the monthly number of trips for both 2019 and 2020.
df, count the number of trips made in 2019 and store the number to a new variable named num_2019_trips.df, count the number of trips made in 2020 and store the number to a new variable named num_2020_trips.For num_2019_trips, retrieve the number of rows where df['year'] is 2019.
# YOUR CODE BEGINS
num_2019_trips = (df['year'] == 2019).sum()
num_2020_trips = (df['year'] == 2020).sum()
# YOUR CODE ENDS
print(f'There were {num_2019_trips} trips in 2019.')
print(f'There were {num_2020_trips} trips in 2020.')
print(f'The number of trips decreased by {(num_2019_trips - num_2020_trips) / num_2019_trips * 100:.1f}%.')
There were 966346 trips in 2019. There were 421847 trips in 2020. The number of trips decreased by 56.3%.
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-07"
did_pass = True
available_points = 3
message = ""
try:
decoded_code = base64.b64decode(b'bnVtXzIwMTlfdHJpcHNfY2hlY2sgPSBkZ\
l9iYWNrdXBbJ3llYXInXS52YWx1ZV9jb3VudHMoKS5sb2NbMjAxOV0KbnVtXzIwMjBf\
dHJpcHNfY2hlY2sgPSBkZl9iYWNrdXBbJ3llYXInXS52YWx1ZV9jb3VudHMoKS5sb2NbMjAyMF0=')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(num_2019_trips, num_2019_trips_check,
f"Incorrect number of 2019 trips, should be {num_2019_trips_check}")
tc.assertEqual(num_2020_trips, num_2020_trips_check,
f"Incorrect number of 2020 trips, should be {num_2020_trips_check}")
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07'], 'student_score': 18, 'total_available': 18, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df, calculate the number of monthly trips and average tip percentages.
# YOUR CODE BEGINS
df_monthly = df.groupby(['year', 'month'], as_index=False).agg({
'start': 'count',
'tip': 'sum',
'fare': 'sum',
}).rename(columns={
'start': 'num_trips'
})
# Calculate average tip %
df_monthly['tip_pct'] = df_monthly['tip'] / df_monthly['fare']
# Remove tip and fare columns since they're not needed anymore
df_monthly.drop(columns=['tip', 'fare'], inplace=True)
# YOUR CODE ENDS
display(df_monthly)
| year | month | num_trips | tip_pct | |
|---|---|---|---|---|
| 0 | 2019 | 1 | 77313 | 0.048254 |
| 1 | 2019 | 2 | 77374 | 0.045353 |
| 2 | 2019 | 3 | 89688 | 0.047561 |
| 3 | 2019 | 4 | 79829 | 0.047786 |
| 4 | 2019 | 5 | 84168 | 0.049438 |
| 5 | 2019 | 6 | 80349 | 0.052670 |
| 6 | 2019 | 7 | 79007 | 0.055360 |
| 7 | 2019 | 8 | 81141 | 0.056450 |
| 8 | 2019 | 9 | 76516 | 0.058380 |
| 9 | 2019 | 10 | 82129 | 0.056311 |
| 10 | 2019 | 11 | 79395 | 0.052962 |
| 11 | 2019 | 12 | 79437 | 0.054570 |
| 12 | 2020 | 1 | 76255 | 0.052041 |
| 13 | 2020 | 2 | 78726 | 0.050506 |
| 14 | 2020 | 3 | 45565 | 0.045580 |
| 15 | 2020 | 4 | 12612 | 0.028806 |
| 16 | 2020 | 5 | 15618 | 0.037591 |
| 17 | 2020 | 6 | 22092 | 0.046910 |
| 18 | 2020 | 7 | 28129 | 0.042972 |
| 19 | 2020 | 8 | 29010 | 0.037146 |
| 20 | 2020 | 9 | 30265 | 0.040097 |
| 21 | 2020 | 10 | 33488 | 0.040802 |
| 22 | 2020 | 11 | 24501 | 0.040884 |
| 23 | 2020 | 12 | 25586 | 0.040130 |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-08"
did_pass = True
available_points = 2
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfbW9udGhseV9jaGVjayA9IGRmX2JhY2t1cC5ncm91\
cGJ5KFsneWVhcicsICdtb250aCddLCBhc19pbmRleD1GYWxzZSkuYWdnKHsKICAgICdzdGFydCc6IC\
djb3VudCcsCiAgICAndGlwJzogJ3N1bScsCiAgICAnZmFyZSc6ICdzdW0nLAp9KS5yZW5hbWUoY29s\
dW1ucz17CiAgICAnc3RhcnQnOiAnbnVtX3RyaXBzJwp9KQoKZGZfbW9udGhseV9jaGVja1sndGlwX3\
BjdCddID0gZGZfbW9udGhseV9jaGVja1sndGlwJ10gLyBkZl9tb250aGx5X2NoZWNrWydmYXJlJ10K\
ZGZfbW9udGhseV9jaGVjay5kcm9wKGNvbHVtbnM9Wyd0aXAnLCAnZmFyZSddLCBpbnBsYWNlPVRydWUp')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(df_monthly.shape, df_monthly_check.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
df_monthly.sort_values(['year', 'month']).reset_index(drop=True),
df_monthly_check.sort_values(['year', 'month']).reset_index(drop=True),
check_like=True
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08'], 'student_score': 20, 'total_available': 20, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_monthly, create a sunburst chart that shows the proportion of monthly number of trips for both 2019 and 2020.width and height to 600.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', and ...s with your own values from the code below.
fig = px.sunburst(
my_dataframe,
path=['column1', 'column2'],
values='column3',
title='Your Title Here',
width=...,
height=...
)
fig.show()
# YOUR CODE BEGINS
fig = px.sunburst(
df_monthly,
path=['year', 'month'],
values='num_trips',
title='Trips Breakdown by Year and Month',
width=600,
height=600
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-09"
did_pass = True
available_points = 3
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'sunburst', 'Must be a sunburst chart')
tc.assertEqual(fig.layout.width, 600, 'Incorrect width')
tc.assertEqual(fig.layout.height, 600, 'Incorrect height')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguc3VuYnVyc3QoCiAgICBkZ\
l9tb250aGx5X2NoZWNrLAogICAgcGF0aD1bJ3llYXInLCAnbW9udGgnXSwKICAgIHZhbHVlcz0n\
bnVtX3RyaXBzJywKICAgIHRpdGxlPSdUcmlwcyBCcmVha2Rvd24gYnkgWWVhciBhbmQgTW9udGg\
nLAogICAgd2lkdGg9NjAwLAogICAgaGVpZ2h0PTYwMAop')
eval(compile(decoded_code, '<string>', 'exec'))
np.testing.assert_array_equal(
fig.data[0].labels,
fig_check.data[0].labels,
'Label(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].parents,
fig_check.data[0].parents,
'Parent(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].values,
fig_check.data[0].values,
'Value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09'], 'student_score': 23, 'total_available': 23, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_monthly, create two bar charts side-by-side within a same figure showing the number of monthly trips for 2019 and 2020.'plotly_dark' theme.width to 1000 and height to 500.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', and ...s with your own values from the code below.
fig = px.bar(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
facet_col='column3',
width=...,
height=...,
template='plotly_dark',
color='num_trips',
color_continuous_scale=['White', 'Yellow']
)
fig.show()
# YOUR CODE BEGINS
fig = px.bar(
df_monthly,
title='Monthly Number of Trips in 2019 and 2020',
x='month',
y='num_trips',
facet_col='year',
width=1000,
height=500,
template='plotly_dark',
color='num_trips',
color_continuous_scale=['White', 'Yellow']
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-10"
did_pass = True
available_points = 5
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), 2, 'Must use a facet grid to display two bar charts side-by-side')
tc.assertEqual(fig.layout.width, 1000, 'Incorrect width')
tc.assertEqual(fig.layout.height, 500, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguYmFyKAogICAgZGZfbW9udGhseV9jaGV\
jaywKICAgIHRpdGxlPSdNb250aGx5IE51bWJlciBvZiBUcmlwcyBpbiAyMDE5IGFuZCAyMDIwJywKICAgIHg9\
J21vbnRoJywKICAgIHk9J251bV90cmlwcycsCiAgICBmYWNldF9jb2w9J3llYXInLAogICAgd2lkdGg9MTAwM\
CwKICAgIGhlaWdodD01MDAsCiAgICB0ZW1wbGF0ZT0ncGxvdGx5X2RhcmsnLAogICAgY29sb3I9J251bV90cm\
lwcycsCiAgICBjb2xvcl9jb250aW51b3VzX3NjYWxlPVsnV2hpdGUnLCAnWWVsbG93J10KKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
for i in range(len(fig_check.data)):
tc.assertEqual(fig.data[i].type, 'bar', 'Must be a bar chart')
tc.assertEqual(fig.data[i].orientation, 'v', 'Must be a vertical bar chart')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10'], 'student_score': 28, 'total_available': 28, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_monthly, create a line plot of monthly number of trips.year to create two separate lines (encoded by different colors) for 2019 and 2020.'plotly_dark' theme.width to 800 and height to 500.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', and ...s with your own values from the code below.
fig = px.line(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
color='column3',
template='plotly_dark',
width=...,
height=...
)
fig.show()
# YOUR CODE BEGINS
fig = px.line(
df_monthly,
title='Monthly Number of Trips Comparison between 2019 and 2020',
x='month',
y='num_trips',
color='year',
template='plotly_dark',
width=800,
height=500
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-11"
did_pass = True
available_points = 4
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), df_monthly_check['year'].nunique(), 'Must encode each year with different colors')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 500, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHgubGluZSgKICAgIG\
RmX21vbnRobHlfY2hlY2ssCiAgICB0aXRsZT0nTW9udGhseSBOdW1iZXIgb2YgVHJpcH\
MgQ29tcGFyaXNvbiBiZXR3ZWVuIDIwMTkgYW5kIDIwMjAnLAogICAgeD0nbW9udGgnLA\
ogICAgeT0nbnVtX3RyaXBzJywKICAgIGNvbG9yPSd5ZWFyJywKICAgIHRlbXBsYXRlPS\
dwbG90bHlfZGFyaycsCiAgICB3aWR0aD04MDAsCiAgICBoZWlnaHQ9NTAwCik=')
eval(compile(decoded_code, '<string>', 'exec'))
for i in range(len(fig_check.data)):
# In plotly, a line plot is a scatter plot with lines connecting the dots
tc.assertEqual(fig.data[i].type, 'scatter', 'Must be a line plot')
tc.assertIsNotNone(fig_check.data[i].line.color, 'Must be a line plot')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11'], 'student_score': 32, 'total_available': 32, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
Did passengers tip more on average during the pandemic since they appreciated the drivers providing services during risky times?
Or did the passengers tip less on average since the pandemic has devastated the nation's economy in 2020?
df_monthly, create two bar charts side-by-side within a same figure showing the monthly average tip percentages in 2019 and 2020.tip_pct column in df_monthly contains the monthly average tip percentages.'plotly_dark' theme.width to 1000 and height to 500.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', and ...s with your own values from the code below.
fig = px.bar(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
facet_col='column3',
width=...,
height=...,
template='plotly_dark',
color='tip_pct',
color_continuous_scale=['White', 'GreenYellow']
)
fig.update_layout(yaxis_tickformat='%')
fig.update_layout(yaxis2_tickformat='%')
fig.show()
# YOUR CODE BEGINS
fig = px.bar(
df_monthly,
title='Monthly Average Tip % Based on Fares in 2019 and 2020',
x='month',
y='tip_pct',
facet_col='year',
width=1000,
height=500,
template='plotly_dark',
color='tip_pct',
color_continuous_scale=['White', 'GreenYellow']
)
fig.update_layout(yaxis_tickformat='%')
fig.update_layout(yaxis2_tickformat='%')
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-12"
did_pass = True
available_points = 5
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), 2, 'Must use a facet grid to display two bar charts side-by-side')
tc.assertEqual(fig.layout.width, 1000, 'Incorrect width')
tc.assertEqual(fig.layout.height, 500, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguYmFyKAogICAgZGZfbW9udGhseV9\
jaGVjaywKICAgIHRpdGxlPSdNb250aGx5IEF2ZXJhZ2UgVGlwICUgQmFzZWQgb24gRmFyZXMgaW4gMjAx\
OSBhbmQgMjAyMCcsCiAgICB4PSdtb250aCcsCiAgICB5PSd0aXBfcGN0JywKICAgIGZhY2V0X2NvbD0ne\
WVhcicsCiAgICB3aWR0aD0xMDAwLAogICAgaGVpZ2h0PTUwMCwKICAgIHRlbXBsYXRlPSdwbG90bHlfZG\
FyaycsCiAgICBjb2xvcj0ndGlwX3BjdCcsCiAgICBjb2xvcl9jb250aW51b3VzX3NjYWxlPVsnV2hpdGU\
nLCAnR3JlZW5ZZWxsb3cnXQop')
eval(compile(decoded_code, '<string>', 'exec'))
for i in range(len(fig_check.data)):
tc.assertEqual(fig.data[i].type, 'bar', 'Must be a bar chart')
tc.assertEqual(fig.data[i].orientation, 'v', 'Must be a vertical bar chart')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12'], 'student_score': 37, 'total_available': 37, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_monthly, create a line plot of monthly average tip percentages.year to create two separate lines (encoded by different colors) for 2019 and 2020.tip_pct column in df_monthly contains the monthly average tip percentages.'plotly_dark' theme.width to 800 and height to 500.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', and ...s with your own values from the code below.
fig = px.line(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
color='column3',
template='plotly_dark',
width=...,
height=...
)
fig.update_layout(yaxis_tickformat='%')
fig.show()
# YOUR CODE BEGINS
fig = px.line(
df_monthly,
title='Monthly Average Tip % Comparison between 2019 and 2020',
x='month',
y='tip_pct',
color='year',
template='plotly_dark',
width=800,
height=500
)
fig.update_layout(yaxis_tickformat='%')
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-13"
did_pass = True
available_points = 4
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), df_monthly_check['year'].nunique(), 'Must encode each year with different colors')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 500, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHgubGluZSgKICAgIGRmX21vbnRob\
HlfY2hlY2ssCiAgICB0aXRsZT0nTW9udGhseSBBdmVyYWdlIFRpcCAlIENvbXBhcmlzb24gYmV0d2Vl\
biAyMDE5IGFuZCAyMDIwJywKICAgIHg9J21vbnRoJywKICAgIHk9J3RpcF9wY3QnLAogICAgY29sb3I\
9J3llYXInLAogICAgdGVtcGxhdGU9J3Bsb3RseV9kYXJrJywKICAgIHdpZHRoPTgwMCwKICAgIGhlaWdodD01MDAKKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
for i in range(len(fig_check.data)):
# In plotly, a line plot is a scatter plot with lines connecting the dots
tc.assertEqual(fig.data[i].type, 'scatter', 'Must be a line plot')
tc.assertIsNotNone(fig_check.data[i].line.color, 'Must be a line plot')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13'], 'student_score': 41, 'total_available': 41, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
Every July 4th, massive crowds gather around places (or boats) to view the fireworks. How did July 4th become the "national fireworks day"? July 4, 1776 is considered to be the birth of United States of Amercia as an independent nation. The Continental Congress approved the final wording of the Declaration of Independence on July 4, 1776. The first-ever recorded July 4th fireworks celebration was held in Philadelphia on July 4, 1777. Since then, there hasn't been an Independence Day without a firework. 💥💥
In this part, you will find trips started on July 4th between 5-6 PM and create different scatter plots based on those trips.
df, filter rows where:month is 7 (July) and,day is 4 (4th) and,hour is 17 (All trips started between 5-6 pm)df_july_fourth.df should remain unaltered after running your code.The code below filters rows where column is 10, column2 is 20, and column3 is 30. The filtered DataFrame is stored to a new variable named my_filtered.
my_filtered = my_df[(my_df['column1'] == 10) & (my_df['column2'] == 20) & (my_df['column3'] == 30)]
# YOUR CODE BEGINS
df_july_fourth = df[(df['month'] == 7) & (df['day'] == 4) & (df['hour'] == 17)]
# YOUR CODE ENDS
display(df_july_fourth.sample(3))
| start | trip_seconds | trip_miles | pickup_area | dropoff_area | fare | tip | additional_charges | trip_total | shared_trip_authorized | trips_pooled | pickup_lat | pickup_lon | dropoff_lat | dropoff_lon | year | month | day | dayofweek | hour | weekday_weekend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 497046 | 2019-07-04 17:30:00 | 1259 | 7.2 | Near South Side | Lake View | 12.5 | 0.0 | 2.55 | 15.05 | False | 1 | 41.859350 | -87.617358 | 41.946295 | -87.654298 | 2019 | 7 | 4 | 3 | 17 | weekday |
| 496959 | 2019-07-04 17:00:00 | 442 | 2.2 | Loop | Armour Square | 5.0 | 0.0 | 2.55 | 7.55 | False | 1 | 41.880994 | -87.632746 | 41.851018 | -87.635092 | 2019 | 7 | 4 | 3 | 17 | weekday |
| 497077 | 2019-07-04 17:45:00 | 384 | 1.6 | Loop | Loop | 5.0 | 0.0 | 2.55 | 7.55 | False | 1 | 41.880994 | -87.632746 | 41.880994 | -87.632746 | 2019 | 7 | 4 | 3 | 17 | weekday |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-14"
did_pass = True
available_points = 3
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfanVseV9mb3VydGhfY2hlY2sgPSBkZl9iYWNrdXBbKGRmX2JhY2t1cFs\
nbW9udGgnXSA9PSA3KSAmIChkZl9iYWNrdXBbJ2RheSddID09IDQpICYgKGRmX2JhY2t1cFsnaG91ciddID09IDE3KV0=')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(df_july_fourth.shape, df_july_fourth_check.shape, 'Incorrect number of rows and/or columns')
pd.testing.assert_frame_equal(
df_july_fourth.sort_values(df_july_fourth.columns.tolist()).reset_index(drop=True),
df_july_fourth_check.sort_values(df_july_fourth_check.columns.tolist()).reset_index(drop=True),
check_like=True
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14'], 'student_score': 44, 'total_available': 44, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_july_fourth, create two scatter plots for each year side-by-side within a same figure with the following axes.x: trip_seconds,y: trip_miles,trip_total column to differente the sizes of points.shared_trip_authorized column to differente the colors of points.'plotly_dark' theme.width to 1200 and height to 500.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', 'column3', 'column4', 'column5', and ...s with your own values from the code below.
fig = px.scatter(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
size='column3',
facet_col='column4',
color='column5',
template='plotly_dark',
width=...,
height=...,
)
fig.show()
# YOUR CODE BEGINS
fig = px.scatter(
df_july_fourth,
title='Trip Seconds vs Trip Miles with July 4th 5-6 PM Trips',
x='trip_seconds',
y='trip_miles',
size='trip_total',
facet_col='year',
color='shared_trip_authorized',
template='plotly_dark',
width=1200,
height=500,
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-15"
did_pass = True
available_points = 5
message = ""
try:
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguc2NhdHRlcigKICAgIGRmX2p1bHlf\
Zm91cnRoX2NoZWNrLAogICAgdGl0bGU9J1RyaXAgU2Vjb25kcyB2cyBUcmlwIE1pbGVzIHdpdGggSnVseS\
A0dGggNS02IFBNIFRyaXBzJywKICAgIHg9J3RyaXBfc2Vjb25kcycsCiAgICB5PSd0cmlwX21pbGVzJywK\
ICAgIHNpemU9J3RyaXBfdG90YWwnLAogICAgZmFjZXRfY29sPSd5ZWFyJywKICAgIGNvbG9yPSdzaGFyZW\
RfdHJpcF9hdXRob3JpemVkJywKICAgIHRlbXBsYXRlPSdwbG90bHlfZGFyaycsCiAgICB3aWR0aD0xMjAw\
LAogICAgaGVpZ2h0PTUwMCwKKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), len(fig_check.data),
'Must use a facet grid to display two scatter plots side-by-side with color-encoding')
tc.assertEqual(fig.layout.width, 1200, 'Incorrect width')
tc.assertEqual(fig.layout.height, 500, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
for i in range(len(fig_check.data)):
tc.assertEqual(fig.data[i].type, 'scatter', 'Must be a scatter plot')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15'], 'student_score': 49, 'total_available': 49, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
trip_seconds and trip_miles.shared_trips_authorized == True) in 2020. df_july_fourth, create a 3D scatter plot of trips in 2019 using the following axes.x: trip_seconds,y: trip_miles,z: trip_totalshared_trip_authorized column to differente the colors of points.'plotly_dark' theme.width and height to 800.fig.fig.show()
Replace 'column1', 'column2', 'column3', 'column4', and ...s with your own values from the code below.
fig = px.scatter_3d(
df_july_fourth[df_july_fourth['year'] == 2019],
title='Your Title Here',
x='column1',
y='column2',
z='column3',
color='column4',
template='plotly_dark',
width=...,
height=...
)
fig.show()
# YOUR CODE BEGINS
fig = px.scatter_3d(
df_july_fourth[df_july_fourth['year'] == 2019],
title='Trip Seconds vs Trip Miles vs Trip Total with July 4th, 2019 5-6 PM Trips',
x='trip_seconds',
y='trip_miles',
z='trip_total',
color='shared_trip_authorized',
template='plotly_dark',
width=800,
height=800
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-16"
did_pass = True
available_points = 5
message = ""
try:
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguc2NhdHRl\
cl8zZCgKICAgIGRmX2p1bHlfZm91cnRoX2NoZWNrW2RmX2p1bHlfZm91cnRoX2\
NoZWNrWyd5ZWFyJ10gPT0gMjAxOV0sCiAgICB0aXRsZT0nVHJpcCBTZWNvbmRz\
IHZzIFRyaXAgTWlsZXMgdnMgVHJpcCBUb3RhbCB3aXRoIEp1bHkgNHRoLCAyMD\
E5IDUtNiBQTSBUcmlwcycsCiAgICB4PSd0cmlwX3NlY29uZHMnLAogICAgeT0n\
dHJpcF9taWxlcycsCiAgICB6PSd0cmlwX3RvdGFsJywKICAgIGNvbG9yPSdzaG\
FyZWRfdHJpcF9hdXRob3JpemVkJywKICAgIHRlbXBsYXRlPSdwbG90bHlfZGFy\
aycsCiAgICB3aWR0aD04MDAsCiAgICBoZWlnaHQ9ODAwCik=')
eval(compile(decoded_code, '<string>', 'exec'))
# DO NOT CHANGE THE CODE IN THIS CELL
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(len(fig.data), len(fig_check.data), "Check whether you've supplied the color parameter to px.scatter_3d()")
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 800, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_dark'], 'Incorrect plotly theme (template)')
for i in range(len(fig_check.data)):
tc.assertEqual(fig.data[i].type, 'scatter3d', 'Must be a 3D scatter plot')
np.testing.assert_array_equal(
fig.data[i].x,
fig_check.data[i].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].y,
fig_check.data[i].y,
'y-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[i].z,
fig_check.data[i].z,
'z-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16'], 'student_score': 54, 'total_available': 54, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
▶️ Run the code cell below to repeat the previous deliverable with trips in 2020.
# DO NOT CHANGE THE CODE BELOW
fig = px.scatter_3d(
df_july_fourth[df_july_fourth['year'] == 2020],
title='Trip Seconds vs Trip Miles vs Trip Total with July 4th, 2020 5-6 PM Trips',
x='trip_seconds',
y='trip_miles',
z='trip_total',
color='shared_trip_authorized',
template='plotly_dark',
width=800,
height=800
)
fig.show()
trip_seconds, trip_miles, and trip_total.In this part, you will find the top 20 pickup areas and analyze the trips originating from those areas.
# YOUR CODE BEGINS
top_20_pickup_areas = df['pickup_area'].value_counts().head(20).index.tolist()
# YOUR CODE ENDS
print(top_20_pickup_areas)
['Near North Side', 'Loop', 'Near West Side', 'Lake View', 'West Town', 'Lincoln Park', 'Logan Square', 'Uptown', 'Ohare', 'Near South Side', 'Edgewater', 'Hyde Park', 'Lower West Side', 'North Center', 'Austin', 'Avondale', 'Lincoln Square', 'Rogers Park', 'Irving Park', 'South Shore']
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-17"
did_pass = True
available_points = 2
message = ""
try:
decoded_code = base64.b64decode(b'dG9wXzIwX3BpY2t1cF9hcmVhc19jaGVjayA9IGRmX2JhY\
2t1cFsncGlja3VwX2FyZWEnXS52YWx1ZV9jb3VudHMoKS5oZWFkKDIwKS5pbmRleC50b2xpc3QoKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(set(top_20_pickup_areas), set(top_20_pickup_areas_check), 'Incorrect pickup areas')
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17'], 'student_score': 56, 'total_available': 56, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
# YOUR CODE BEGINS
df_filtered = df[(df['pickup_area'].isin(top_20_pickup_areas))]
# YOUR CODE ENDS
display(df_filtered.head(3))
print(f'There are {df_filtered.shape[0]} rows and {df_filtered.shape[1]} columns in the filtered DataFrame')
| start | trip_seconds | trip_miles | pickup_area | dropoff_area | fare | tip | additional_charges | trip_total | shared_trip_authorized | trips_pooled | pickup_lat | pickup_lon | dropoff_lat | dropoff_lon | year | month | day | dayofweek | hour | weekday_weekend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 2019-01-01 | 697 | 3.0 | Near North Side | Near West Side | 7.5 | 0.0 | 2.5 | 10.0 | False | 1 | 41.892073 | -87.628874 | 41.885300 | -87.642808 | 2019 | 1 | 1 | 1 | 0 | weekday |
| 2 | 2019-01-01 | 1598 | 4.7 | Lincoln Park | Loop | 10.0 | 2.0 | 2.5 | 14.5 | False | 1 | 41.922083 | -87.634156 | 41.870607 | -87.622173 | 2019 | 1 | 1 | 1 | 0 | weekday |
| 3 | 2019-01-01 | 573 | 0.9 | Near North Side | Near North Side | 5.0 | 0.0 | 2.5 | 7.5 | False | 1 | 41.892042 | -87.631864 | 41.892508 | -87.626215 | 2019 | 1 | 1 | 1 | 0 | weekday |
There are 1084442 rows and 21 columns in the filtered DataFrame
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-18"
did_pass = True
available_points = 3
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfZmlsdGVyZWRfY2hlY2sgPSBkZl9iYWNrdXBbKGR\
mX2JhY2t1cFsncGlja3VwX2FyZWEnXS5pc2luKHRvcF8yMF9waWNrdXBfYXJlYXNfY2hlY2spKV0=')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(df_filtered.shape, df_filtered_check.shape, 'Incorrect number of rows and/or columns')
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18'], 'student_score': 59, 'total_available': 59, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
# YOUR CODE BEGINS
df_by_pickup_area = df_filtered.groupby(
'pickup_area', as_index=False
).agg({
'start': 'count',
'trip_total': 'mean',
}).rename(columns={
'start': 'num_trips'
})
# YOUR CODE ENDS
display(df_by_pickup_area.head(5))
| pickup_area | num_trips | trip_total | |
|---|---|---|---|
| 0 | Austin | 16851 | 13.246273 |
| 1 | Avondale | 16425 | 12.623803 |
| 2 | Edgewater | 21689 | 13.867470 |
| 3 | Hyde Park | 21303 | 14.523999 |
| 4 | Irving Park | 14570 | 13.326660 |
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-19"
did_pass = True
available_points = 2
message = ""
try:
decoded_code = base64.b64decode(b'ZGZfYnlfcGlja3VwX2FyZWFfY2hlY2sgPSBkZl9maWx0Z\
XJlZF9jaGVjay5ncm91cGJ5KAogICAgJ3BpY2t1cF9hcmVhJywgYXNfaW5kZXg9RmFsc2UKKS5hZ2co\
ewogICAgJ3N0YXJ0JzogJ2NvdW50JywKICAgICd0cmlwX3RvdGFsJzogJ21lYW4nLAp9KS5yZW5hbWU\
oY29sdW1ucz17CiAgICAnc3RhcnQnOiAnbnVtX3RyaXBzJwp9KQ==')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(
df_by_pickup_area.shape,
df_by_pickup_area_check.shape,
'Incorrect number of rows and/or columns'
)
pd.testing.assert_frame_equal(
df_by_pickup_area.sort_values(df_by_pickup_area.columns.tolist()).reset_index(drop=True),
df_by_pickup_area_check.sort_values(df_by_pickup_area_check.columns.tolist()).reset_index(drop=True),
check_like=True
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19'], 'student_score': 61, 'total_available': 61, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_by_pickup_area, create a horizontal bar chart displaying the number of trips by each pickup area.800 and do not supply a width (by default, a Plotly figure will expand to fit the window if the width parameter is omitted).plotly_white template.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', and ...s with your own values from the code below.
fig = px.bar(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
color='column1',
color_continuous_scale='emrld',
text='column1',
template='plotly_white',
height=...
)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_yaxes(categoryorder='total ascending')
fig.show()
# YOUR CODE BEGINS
fig = px.bar(
df_by_pickup_area,
title='Number of Trips by Pickup Area',
x='num_trips',
y='pickup_area',
color='num_trips',
color_continuous_scale='emrld',
text='num_trips',
template='plotly_white',
height=800
)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_yaxes(categoryorder='total ascending')
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-20"
did_pass = True
available_points = 5
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.layout.height, 800, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_white'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguYmFyKAog\
ICAgZGZfYnlfcGlja3VwX2FyZWFfY2hlY2ssCiAgICB0aXRsZT0nTnVtYmVyIG\
9mIFRyaXBzIGJ5IFBpY2t1cCBBcmVhJywKICAgIHg9J251bV90cmlwcycsCiAg\
ICB5PSdwaWNrdXBfYXJlYScsCiAgICBjb2xvcj0nbnVtX3RyaXBzJywKICAgIG\
NvbG9yX2NvbnRpbnVvdXNfc2NhbGU9J2VtcmxkJywKICAgIHRleHQ9J251bV90\
cmlwcycsCiAgICB0ZW1wbGF0ZT0ncGxvdGx5X3doaXRlJywKICAgIGhlaWdodD04MDAKKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(fig.data[0].type, 'bar', 'Must be a line plot')
np.testing.assert_array_equal(
fig.data[0].x,
fig_check.data[0].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].y,
fig_check.data[0].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19', 'deliverable-20'], 'student_score': 66, 'total_available': 66, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-20': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_by_pickup_area, create a horizontal bar chart displaying the average trip total by each pickup area.800 and do not supply a width (by default, a Plotly figure will expand to fit the window if the width parameter is omitted).plotly_white template.fig.fig.show()
Replace my_dataframe, 'column1', 'column2', and ...s with your own values from the code below.
fig = px.bar(
my_dataframe,
title='Your Title Here',
x='column1',
y='column2',
text='column1',
template='plotly_white',
height=...
)
fig.update_traces(texttemplate='$%{text:.1f}', textposition='outside')
fig.update_yaxes(categoryorder='total ascending')
fig.show()
# YOUR CODE BEGINS
fig = px.bar(
df_by_pickup_area,
title='Average Trip Total ($) by Pickup Area',
x='trip_total',
y='pickup_area',
text='trip_total',
template='plotly_white',
height=800
)
fig.update_traces(texttemplate='$%{text:.1f}', textposition='outside')
fig.update_yaxes(categoryorder='total ascending')
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-21"
did_pass = True
available_points = 5
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.layout.height, 800, 'Incorrect height')
tc.assertEqual(fig.layout.template, pio.templates['plotly_white'], 'Incorrect plotly theme (template)')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguYmFyKAogICAg\
ZGZfYnlfcGlja3VwX2FyZWFfY2hlY2ssCiAgICB0aXRsZT0nQXZlcmFnZSBUcmlwIF\
RvdGFsICgkKSBieSBQaWNrdXAgQXJlYScsCiAgICB4PSd0cmlwX3RvdGFsJywKICAg\
IHk9J3BpY2t1cF9hcmVhJywKICAgIHRleHQ9J3RyaXBfdG90YWwnLAogICAgdGVtcG\
xhdGU9J3Bsb3RseV93aGl0ZScsCiAgICBoZWlnaHQ9ODAwCik=')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(fig.data[0].type, 'bar', 'Must be a line plot')
np.testing.assert_array_equal(
fig.data[0].x,
fig_check.data[0].x,
'x-axis value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].y,
fig_check.data[0].y,
'y-axis value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19', 'deliverable-20', 'deliverable-21'], 'student_score': 71, 'total_available': 71, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-20': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-21': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
df_by_pickup_area, create a treemap of pickup areas.fig.fig.show()
Replace my_dataframe, 'column1', and 'column2' with your own values from the code below.
fig = px.treemap(
my_dataframe,
title='Pickup Area Breakdown',
path=['column1'],
values='column2',
height=600
)
fig.show()
# YOUR CODE BEGINS
fig = px.treemap(
df_by_pickup_area,
title='Pickup Area Breakdown',
path=['pickup_area'],
values='num_trips',
height=600
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-22"
did_pass = True
available_points = 5
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHgudHJlZW1hcCg\
KICAgIGRmX2J5X3BpY2t1cF9hcmVhX2NoZWNrLAogICAgdGl0bGU9J1BpY2t1cCBB\
cmVhIEJyZWFrZG93bicsCiAgICBwYXRoPVsncGlja3VwX2FyZWEnXSwKICAgIHZhb\
HVlcz0nbnVtX3RyaXBzJywKICAgIGhlaWdodD02MDAKKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
tc.assertEqual(fig.data[0].type, 'treemap', 'Must be a treemap')
np.testing.assert_array_equal(
fig.data[0].labels,
fig_check.data[0].labels,
'Label value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].parents,
fig_check.data[0].parents,
'Parent value(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].values,
fig_check.data[0].values,
'Size value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19', 'deliverable-20', 'deliverable-21', 'deliverable-22'], 'student_score': 76, 'total_available': 76, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-20': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-21': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-22': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
In this part, you will add a new dimension (weekday/weekend) to the top 20 pickup areas.
▶️ Run the code cell below to create the number of trips and average trip totals by pickup area and weekday/weekend classification.
# DO NOT CHANGE THE CODE BELOW
df_by_pickup_area_weekday_weekend = df_filtered.groupby(
['pickup_area', 'weekday_weekend'], as_index=False
).agg({
'start': 'count',
'trip_total': 'mean',
}).rename(columns={
'start': 'num_trips'
})
display(df_by_pickup_area_weekday_weekend.head(5))
| pickup_area | weekday_weekend | num_trips | trip_total | |
|---|---|---|---|---|
| 0 | Austin | weekday | 9064 | 13.213864 |
| 1 | Austin | weekend | 7787 | 13.283996 |
| 2 | Avondale | weekday | 7942 | 12.617546 |
| 3 | Avondale | weekend | 8483 | 12.629662 |
| 4 | Edgewater | weekday | 10745 | 14.131752 |
# YOUR CODE BEGINS
fig = px.sunburst(
df_by_pickup_area_weekday_weekend,
path=['pickup_area', 'weekday_weekend'],
values='num_trips',
title='Pickup Area Breakdown (Weekday/Weekend)',
width=800,
height=800
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-23"
did_pass = True
available_points = 2
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'sunburst', 'Must be a sunburst chart')
tc.assertEqual(fig.layout.width, 800, 'Incorrect width')
tc.assertEqual(fig.layout.height, 800, 'Incorrect height')
decoded_code = base64.b64decode(b'ZmlnX2NoZWNrID0gcHguc3VuYnVyc3\
QoCiAgICBkZl9ieV9waWNrdXBfYXJlYV93ZWVrZGF5X3dlZWtlbmQsCiAgICBwYX\
RoPVsncGlja3VwX2FyZWEnLCAnd2Vla2RheV93ZWVrZW5kJ10sCiAgICB2YWx1ZX\
M9J251bV90cmlwcycsCiAgICB0aXRsZT0nUGlja3VwIEFyZWEgQnJlYWtkb3duIC\
hXZWVrZGF5L1dlZWtlbmQpJywKICAgIHdpZHRoPTgwMCwKICAgIGhlaWdodD04MDAKKQ==')
eval(compile(decoded_code, '<string>', 'exec'))
np.testing.assert_array_equal(
fig.data[0].labels,
fig_check.data[0].labels,
'Label(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].parents,
fig_check.data[0].parents,
'Parent(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].values,
fig_check.data[0].values,
'Value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19', 'deliverable-20', 'deliverable-21', 'deliverable-22', 'deliverable-23'], 'student_score': 78, 'total_available': 78, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-20': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-21': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-22': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-23': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
# YOUR CODE BEGINS
fig = px.treemap(
df_by_pickup_area_weekday_weekend,
path=['pickup_area', 'weekday_weekend'],
values='num_trips',
title='Pickup Area Breakdown (Weekday/Weekend)',
height=600
)
fig.show()
# YOUR CODE ENDS
# DO NOT CHANGE THE CODE IN THIS CELL
part_name = "deliverable-24"
did_pass = True
available_points = 2
message = ""
try:
tc.assertIsNotNone(fig.layout.title.text, 'Missing figure title')
tc.assertEqual(fig.data[0].type, 'treemap', 'Must be a treemap chart')
tc.assertEqual(fig.layout.height, 600, 'Incorrect height')
decoded_code = base64.b64decode(b'ZmlnID0gcHgudHJlZW1hcCgKICAgIGRm\
X2J5X3BpY2t1cF9hcmVhX3dlZWtkYXlfd2Vla2VuZCwKICAgIHBhdGg9WydwaWNrdX\
BfYXJlYScsICd3ZWVrZGF5X3dlZWtlbmQnXSwKICAgIHZhbHVlcz0nbnVtX3RyaXBz\
JywKICAgIHRpdGxlPSdQaWNrdXAgQXJlYSBCcmVha2Rvd24gKFdlZWtkYXkvV2Vla2\
VuZCknLAogICAgaGVpZ2h0PTYwMAop')
eval(compile(decoded_code, '<string>', 'exec'))
np.testing.assert_array_equal(
fig.data[0].labels,
fig_check.data[0].labels,
'Label(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].parents,
fig_check.data[0].parents,
'Parent(s) mismatch'
)
np.testing.assert_array_equal(
fig.data[0].values,
fig_check.data[0].values,
'Value(s) mismatch'
)
except Exception as err:
did_pass = False
message = err
if not is_autograder_env:
raise err
finally:
if is_autograder_env:
record_part(part_name, did_pass, available_points, message)
{'netid': 'grader_netid', 'order': ['deliverable-01', 'deliverable-02', 'deliverable-03', 'deliverable-04', 'deliverable-05', 'deliverable-06', 'deliverable-07', 'deliverable-08', 'deliverable-09', 'deliverable-10', 'deliverable-11', 'deliverable-12', 'deliverable-13', 'deliverable-14', 'deliverable-15', 'deliverable-16', 'deliverable-17', 'deliverable-18', 'deliverable-19', 'deliverable-20', 'deliverable-21', 'deliverable-22', 'deliverable-23', 'deliverable-24'], 'student_score': 80, 'total_available': 80, 'breakdown': {'deliverable-01': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-02': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-03': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-04': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-05': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-06': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-07': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-08': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-09': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-10': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-11': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-12': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-13': {'available_points': 4, 'points': 4, 'did_pass': True, 'message': ''}, 'deliverable-14': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-15': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-16': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-17': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-18': {'available_points': 3, 'points': 3, 'did_pass': True, 'message': ''}, 'deliverable-19': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-20': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-21': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-22': {'available_points': 5, 'points': 5, 'did_pass': True, 'message': ''}, 'deliverable-23': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}, 'deliverable-24': {'available_points': 2, 'points': 2, 'did_pass': True, 'message': ''}}, 'assignment_name': 'case-study-03-ridesharing-trips-dataviz'}
There is one final step before exporting the notebook as an .ipynb file for submission. You should restart your runtime (kernal) and run all cells from the beginning to ensure that your notebook is structured properly.
Go to the "Runtime" ("Kernel" if you're on Jupyter Lab) menu on top. Select "Restart and run all". Failing to pass this step may result in significant loss of points since the autograder will fail to run.
print('🎯 Restart and run all successful')
🎯 Restart and run all successful
# GRADER_ONLY
import os
from datetime import datetime
autograder_output_filename = f"{assignment_name}-autograder-results.csv"
netid = autograded_result["netid"]
assignment_name = autograded_result["assignment_name"]
student_score = autograded_result["student_score"]
total_available = autograded_result["total_available"]
graded_time = datetime.now().strftime("%Y-%m-%d %H:%M")
feedback = '''Assignment: {0}
NetID: {1}
Total Score: {2}/{3}
Grade Breakdown'''.format(assignment_name, netid, student_score, total_available)
for graded_part_name in autograded_result["order"]:
part_info = autograded_result["breakdown"][graded_part_name]
feedback += '''
--------------------------------
Component: {0}
Result: {1}
Score: {2}/{3}'''.format(graded_part_name, "Pass" if part_info["did_pass"] else "Fail", part_info["points"], part_info["available_points"])
feedback += "" if part_info["did_pass"] else f"\nError Message: {part_info['message']}"
if is_autograder_env:
df_agr_new = pd.DataFrame([{
"netid": netid,
"assignment_name": assignment_name,
"student_score": student_score,
"total_available": total_available,
"graded_time": graded_time,
"feedback": feedback
}])
if not os.path.exists(autograder_output_filename):
df_agr = df_agr_new
else:
df_agr = pd.read_csv(autograder_output_filename)
df_agr = df_agr.loc[~((df_agr["netid"] == netid) & (df_agr["assignment_name"] == assignment_name))]
df_agr = pd.concat([df_agr, df_agr_new])
df_agr.sort_values("graded_time").to_csv(autograder_output_filename, index=None)